State Results

# clear labels for versions
versions <- tibble(
  version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
  vlabel = factor(
    c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
    "$\\beta$ Centered at Survey Value",
    "$Pr(S_1|untested)$ Centered at Survey Value",
    "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
    ),
    levels = c(
        "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
        "$\\beta$ Centered at Survey Value",
         "$Pr(S_1|untested)$ Centered at Survey Value",
        "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
    ) )
)


# read state-level results
targets_dir <- here("_targets")

state_res <- tar_read(state_v1, 
                   store =targets_dir) %>% 
  bind_rows(
    tar_read(state_v2,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v3, 
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v4, 
             store =targets_dir)
   )%>%
   bind_rows(
    tar_read(state_v5,
             store =targets_dir)
  )%>%
   bind_rows(
    tar_read(state_v6,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v7,
             store =targets_dir)
  )

covidestim <- tar_read(covidestim_biweekly_state,
                        store =targets_dir)

dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
codes <- read_csv(here('data/demographic/statecodes.csv'))

# last biweek for first model is biweek 24
last_biweek_old_model <- dates %>% 
  group_by(biweek) %>%
  summarize(mindate=min(date),maxdate=max(date))  %>%
  filter("2021-11-30" >= mindate & "2021-11-30" <= maxdate)


cat("Last two week interval with previous model was biweek", last_biweek_old_model$biweek,
    "\ndates", paste0("(", last_biweek_old_model$mindate, ", ",
                    last_biweek_old_model$maxdate, ")"))
## Last two week interval with previous model was biweek 24 
## dates (2021-11-19, 2021-12-02)
end_old_model <- last_biweek_old_model$maxdate


state_res <- state_res %>%
  rename(state=fips) %>%
  left_join(versions) %>%
  mutate(state=toupper(state)) %>%
  left_join(covidestim, relationship= "many-to-many") %>%
  left_join(dates, relationship = "many-to-many")  %>%
  rename(name=state_name) %>%
  group_by(biweek) %>%
  mutate(mindate=min(date),
         maxdate=max(date)) %>%
  ungroup()


if(filter_versions){
  state_res <- state_res %>%
  filter(version %in% c("v3", "v5", "v6", "v7"))  %>%
    mutate(vlabel=case_when(
      version == "v7" ~  "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
      version == "v5" ~ "$\\beta$ Centered at Survey Value",
      version =="v6" ~  "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
      version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value"
    ))

}
theme_c <- function(...){ 
   # font <- "Helvetica"   #assign font family up front
    theme_bw() %+replace%    #replace elements we want to change
    
    theme(
      
      
      #text elements
      plot.title = element_text(             #title
                   size = 16,                #set font size
                   face = 'bold',            #bold typeface
                   hjust = .5,
                   vjust = 3),               
      
      plot.subtitle = element_text(          #subtitle
                   size = 12,
                   hjust = .5,
                   face = 'italic',
                   vjust = 3),               #font size
      
      axis.title = element_text(             #axis titles
                   size = 12),               #font size
      
      axis.text = element_text(              #axis text
                   size = 9),
      legend.text = element_text(size = 12),
      # t, r, b, l
      plot.margin = unit(c(1,.5,.5,.5), "cm"),
      legend.position = "right",
      strip.text.x = element_text(size = 11, face = "bold", color="white"),
      strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
      strip.background = element_rect(fill = "#3E3D3D")
      ) %+replace%
      theme(...)
   
}

Summarizing Concordance with Covidestim

Below, we see that although the implementation where \(\Pr(S_1|untested)\) was at the survey as well as the implementation did not vary by state or date, the mean simulation interval width was actually lower, indicating the greater concordance was not merely a result of wider bounds.

state_res %>%
  select(-c(date)) %>%
  distinct() %>%
  mutate(interval_length =exp_cases_ub - exp_cases_lb) %>%
  group_by(vlabel) %>%
  summarize(mean_interval_length = mean(interval_length)) %>%
  arrange(desc(mean_interval_length))
state_res_differenced <- state_res %>%
  select(-date) %>% distinct() %>%
  group_by(state, vlabel) %>%
  mutate(exp_cases_median_differenced = exp_cases_median - lag(exp_cases_median, n =1, order_by = biweek),
         infections_differenced = infections - lag(infections, n = 1, order_by=biweek)) %>%
  select(state, name, vlabel, contains('differenced'), biweek) %>%
  filter(!is.na(exp_cases_median_differenced)) %>%
  filter(biweek <=25)


# if lag is NEGATIVE, covidestim LEADS wastewater
# if lag is POSITIVE, covidestim LAGS wastewater
# since by the documentation, 'the lag k value returned by ccf(x, y) estimates
# the correlation between x[t+k] and y[t].'

get_max_lag_by_version <- function(input_df_for_version,
                                   varnames = c("infections_differenced",
                                  "exp_cases_median_differenced"),
                                  comparison = "bias corrected") {
  
  var1 <- varnames[1]
  var2 <-varnames[2]
  lab <- unique(input_df_for_version$vlabel)
  
  cross_correlations <- ccf(
    pull(input_df_for_version, var1),
     pull(input_df_for_version, var2), 
                            plot = FALSE,
    lag.max=3)
  

  ccf_res <- tibble(lag = cross_correlations$lag[,,1], 
                  correlation=cross_correlations$acf[,,1]) 
  
  m <- ccf_res %>% filter(correlation == max(correlation))
  
  tibble(vlabel = unique(input_df_for_version$vlabel),
         fips = unique(input_df_for_version$state),
         max_lag = m$lag,
         max_corr = m$correlation,
         comparison = comparison,
         name = unique(input_df_for_version$name))

}


cross_corr <- state_res_differenced %>%
  group_by(state, vlabel) %>%
  group_split() %>%
  map_df(get_max_lag_by_version) 
  
states_with_lag <- cross_corr %>%
  filter(max_lag >  0 & 
           vlabel == "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$" & 
           max_corr > .55) %>% 
  pull(fips) %>%
  unique()
# corrected %>%
#   mutate(in_interval = case_when(
#     exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
#     exp_cases_lb  > infections  ~ "Below Interval",
#     exp_cases_ub < infections ~ "Above Interval"
#   )) %>%
#   filter(!is.na(in_interval)) %>%
#   group_by(in_interval, state, vlabel) %>%
#   summarize(n_per_county=n()) %>%
#   group_by(vlabel, in_interval) %>%
#   summarize(n_per_version = sum(n_per_county)) %>%
#   group_by(vlabel) %>%
#   mutate(total = sum(n_per_version)) %>%
#   ungroup() %>%
#   mutate(prop=n_per_version/total)



by_version <- state_res %>%
 # filter(biweek >=6) %>% 
  select(-date) %>%
  distinct() %>% 
  mutate(in_interval = case_when(
    exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
    exp_cases_lb  > infections  ~ "Below Interval",
    exp_cases_ub < infections ~ "Above Interval"
  )) %>%
  filter(!is.na(in_interval)) %>%
  group_by(in_interval, vlabel) %>%
  summarize(n=n()) %>%
  group_by(vlabel) %>%
  mutate(total = sum(n)) %>%
  mutate(prop=n/total) 

labels <- by_version %>%
  arrange(prop) %>%
  pull(vlabel) %>%
  as.character() %>%
  unique()


by_version %>%
  mutate(in_interval = factor(in_interval, 
                             levels = c("Above Interval",
                                        "In Interval",
                                        "Below Interval"
                                        ))) %>%
   ggplot(aes(x= fct_reorder(vlabel,prop,max), 
             fill = in_interval, y =prop)) + 
  geom_bar(stat="identity",position="stack") +
  theme_c() +
  coord_flip()+ scale_fill_manual(values=c("In Interval" = "#79D2AF",
                             "Above Interval" = "#D2AF79",
                      "Below Interval"="#7997D2"),
                       breaks = c("Below Interval",
                               "In Interval",
                               "Above Interval")) +
  labs(x="", 
       y= "Proportion",
       fill = "Covidestim Median:",
       title = "Proportion of Simulation Intervals Where Covidestim Median\nWas Within, Above, or Below the Median") +
  theme_c() +
  theme_c(legend.position="right",
          legend.title = element_text(face="bold", size = 15),
          legend.text= element_text( size = 15),
          legend.spacing.y = unit(3, 'pt'),
          axis.text.y = element_text(size = 17, hjust=1)) +
  theme(plot.title = element_text(size = 20),
          plot.subtitle = element_text(size=18, 
                                       face='italic', 
                                       margin=margin(4,0,4,0)),
        axis.text.x=element_text(size=12),
        axis.title = element_text(size=14)) +
  scale_x_discrete(labels = (TeX(labels)))

ggsave(here('figures/concordance-covidestim-state.pdf'))
by_version %>%
  group_by(vlabel) %>%
  filter(in_interval == "In Interval") %>%
  mutate(n_interval = n) %>%
  ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
  geom_text(aes(label=round(prop,3), x= prop+.03), 
             angle=0) +
  geom_bar(stat="identity", fill="#596281") +
  scale_y_discrete(breaks= unique(by_version$vlabel),
                   labels=function(x)TeX(x)) +
  scale_x_continuous(expand=c(0,0), 
                     limits=c(0,.9), n.breaks=7)  +
  theme_c(axis.text.y = element_text(size = 13, hjust=1),
          axis.title.x = element_text(size=14),
          axis.text.x=element_text(size=10)) +
  labs(y="",
       x="Proportion Containing the Covidestim Median",
       title="Proportion Containing the Covidestim Median",
       subtitle = "By Implementation")

Simulation Intervals Over Time

all_versions <- state_res %>% 
  group_by(state) %>%
  summarize(state_n=n_distinct(version)) %>%
  filter(state_n == max(state_n)) %>%
  pull(state)
state_res %>%
   filter(state %in% sample(unique(all_versions),5))  %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub),
             alpha = .8,
             show.legend=FALSE,
             fill="#596281") +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Compare Versions

All Versions

subset <- state_res %>%
  filter(vlabel ==  "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$") 

# pal <- c("#247C90", "red")
# 
# names(pal) <- c(as.character(unique(subset$vlabel)), "Covidestim")


state_res %>%
   filter(state %in% sample(unique(all_versions),5))  %>% 
  filter(version %in% c("v2", "v5", "v6", "v7")) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill=vlabel),
             alpha = .8,
             show.legend=FALSE #,
           #  fill="#596281"
             ) +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Wu et al.’s Priors versus CTIS Priors (that do not vary)

colors_labs <- state_res %>%
  filter(version %in% c("v1","v7")) %>%
  pull(vlabel) %>% unique()

state_res %>%
  filter(version %in% c("v1","v7") & state %in% sample(unique(.$state),15)) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill=vlabel),
             alpha = .6
             ) +
  facet_wrap(~name, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed),
               nrow=3),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  viridis::scale_fill_viridis(option="magma", begin=.1,end=.9, discrete=TRUE,
                              breaks =(colors_labs),
                              labels=TeX(as.character(colors_labs)))  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State",
       fill = "") 

Compare States

# comp_state_pal <- c("#247C90", "red")
# 
# names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")
# 
# 
# vlab_for_pal <- unique(subset$vlabel) %>% as.character()

color_for_bias <- pal[[which(names(pal)=="$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$")]]

comp_state_pal<- c(color_for_bias,"red")
names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")


subset %>%
   # mutate(vlabel = gsub("$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$",
   #                     "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", as.character(vlabel))) %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = 'Probabilistic Bias Analysis'),
             alpha = .8,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .6) +
  geom_vline(xintercept=end_old_model,
             linetype=2,
             color="darkred",
             linewidth=.5) +
  facet_wrap(~name, ncol=4, scales = "free",
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3.5 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=7),
          axis.text.y = element_text(size=4.5),
          legend.position = "top",
          legend.text=element_text(size=12),
          strip.text.x=element_text(size=11,
                                    face='plain',
                                    color='white')) +
  scale_fill_manual(values =comp_state_pal, name='')  +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by State")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here('figures/intervals-and-covidestim-all-states.pdf'))

Compare States that Have a Lag

# states_with_lag

subset %>%
    filter(state %in% states_with_lag & biweek <= 25) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = 'Probabilistic Bias Analysis'),
             alpha = .8,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .6) +
  # geom_vline(xintercept=end_old_model,
  #            linetype=2,
  #            color="darkred",
  #            linewidth=.5) +
  facet_wrap(~name, ncol=3, scales = "free",
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3.5 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=7),
          axis.text.y = element_text(size=4.5),
          legend.position = "top",
          legend.text=element_text(size=12),
          strip.text.x=element_text(size=11,
                                    face='plain',
                                    color='white')) +
  scale_fill_manual(values =comp_state_pal, name='')  +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by State\nin States with a Lag")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here('figures/intervals-and-covidestim-states-lag.pdf'))

Test Positivity and Positive Tests in States with a Lag

############################################
# POSITIVE TESTS AND TEST POSITIVITY
############################################
pltlist <- subset %>%
  filter(biweek<=25) %>%
  filter(state %in% states_with_lag) %>%
    mutate(testpos = positive/total) %>%
  group_by(across(-date)) %>%
  summarize(date=min(date) + days(7)) %>%
  group_by(state) %>%
  mutate(adj = (1/max(testpos)) * max(positive)) %>%
  group_split() %>%
  map(~ {
    adj <-unique(.x$adj)
    .x %>%
      ggplot(aes(x=date))+
    geom_line(aes(y=positive, color = 'Positive Tests')) +
      geom_point(aes(y=positive, color = 'Positive Tests'),
              alpha=.5,
              size=.8) +
    geom_line(aes(y=testpos*adj, color='Test Positivity'),
              linetype=2) +
      geom_point(aes(y=testpos*adj, color='Test Positivity'),
              alpha=.5,
              size=.8) +
    #scale_linetype_discrete(breaks = c(1,1,2)) +
    scale_color_manual(values=c(
                                "Positive Tests"= "#279143",
                                "Test Positivity"= '#E38E4F')) +
    guides(color = guide_legend(override.aes = list(linetype = c(1,2),
                                                    linewidth=c(2,2)))) +
    theme_c() +
    theme(axis.title = element_text(size=10),
          axis.text =element_text(size=9),
          legend.position="none") +
    scale_y_continuous(labels=comma, 
                       sec.axis = sec_axis(
                         ~./adj,
                         name = 'Test Positivity')) +
    labs(color ='',
         y="Positive Tests",
         x= "Date") +
      facet_wrap(~name)
        
  })

legend <- get_legend(
  pltlist[[1]] +
    theme(legend.position = "top",
          legend.text=element_text(size=16))
)


title_gg <- ggplot() + 
  labs(title = "Comparing Trends in Positive Tests and Test Positivity") + 
  theme(plot.title=element_text(face="bold",
                                hjust = .5, 
                                size = 18,
                                margin =margin(5,0,2,0)))


plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3) 

plot_grid(title_gg, legend, plt, rel_heights=c(.03, .03,.94), ncol=1)

ggsave(here('figures/testpos-lag.pdf'))

Ratio of Estimated to Observed

subset %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb/positive,
             ymax = exp_cases_ub/positive,
             fill = vlabel),
             alpha = .9,
             show.legend=FALSE,
             fill= "#247C90") +
  facet_wrap(~state, ncol=5, 
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=6),
          axis.text.y = element_text(size=8),
          legend.position = "top") +
  scale_fill_manual(values =pal,
                    labels = TeX(names(pal)),
                    name='',
                    breaks="Covidestim")  +
  labs(y = "Ratio of Estimated Infections to Observed",
       x="",
       title = paste0("Ratio of Estimated Infections to Observed by State")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here('figures/ratio-estimated-to-observed-all-states.pdf'))

Comparing States at Specific Two-week Intervals

During Alpha Wave

max_val <- subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13,27)) %>%
  mutate(ratio=exp_cases_ub/positive) %>%
  pull(ratio) %>%
  max()
  
  
plt1 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 7) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  # geom_point(aes(y=state_name,x=exp_cases_median/positive),
  #            color = "darkred", size=.8, alpha=.5) +
  theme_c() + 
  theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Alpha Wave",
       subtitle = "March 26, 2021 through April 8, 2021") +
  xlim(0,max_val+2)

plt1

During Delta Wave

subset %>%
  mutate(ratio = exp_cases_median/positive) %>%
  group_by(biweek) %>%
  summarize(mean = mean(ratio),
            mindate=min(date),
            maxdate=max(date)) %>%
  arrange(desc(mean)) 
plt2 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 13) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  # geom_point(aes(y=state_name,x=exp_cases_median/positive),
  #            color = "darkred", size=.8, alpha=.5) +
  theme_c() + 
 theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Delta Wave",
       subtitle = "June 18, 2021 through July 1, 2021") +
  xlim(0,max_val+2)

plt2

During Omicron Wave

subset %>% 
  left_join(codes) %>%
  filter(biweek == 27) %>%
  pull(date) %>% 
  range()


subset %>% 
  left_join(codes) %>%
  filter(biweek == 13) %>%
  pull(date) %>% 
  range()


plt3 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 27) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  theme_c() + 
  theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Omicron Wave",
       subtitle = "December 31, 2021 through January 14, 2022") +
  xlim(0,max_val+2)

plt3

All Waves Together

plt <- plot_grid(plt1,plt2, plt3, nrow=1, align="none", labels="auto", label_size=19)

title <- ggplot() +
  labs(title = "Ratio of Estimated Infections to Observed Infections") +
  theme_c(plot.title=element_text(size=25))

plot_grid(title, plt, nrow=2, 
          rel_heights= c(.05,.95))

ggsave(here('figures/ratio-estimated-to-observed-multiple-waves.pdf'))
cat("Time interval with the highest ratio of estimated cases to observed: ")
## Time interval with the highest ratio of estimated cases to observed:
subset %>%
  group_by(biweek) %>%
  mutate(ratio=exp_cases_median/positive) %>%
  summarize(mean_ratio=mean(ratio),
         date = paste(range(date),collapse=" to ")) %>%
  slice_max(n=1, order_by=mean_ratio)
subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  select(-date) %>%
  distinct() %>%
  group_by(state) %>%
  mutate(ord = empirical_testpos[which(biweek==7)]) %>%
  mutate(biweek=factor(biweek)) %>%
  ggplot(aes(y=fct_reorder(state,ord),
             x=empirical_testpos,fill=biweek)) +
  geom_bar(stat="identity",position="dodge")+
  theme_c() +
  theme_c() +
  labs(title = "Testing Positivity by State",
       y="",
       x="Biweekly Testing Positivity",
       fill="") +
  scale_x_continuous(n.breaks=10)

subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  group_by(biweek) %>%
  mutate(daterange= paste(format(min(date), "%B %d %Y"),
                          "to", 
                          format(max(date), "%B %d %Y")),
         daterange=factor(
           daterange,
           levels=c(
             "March 26 2021 to April 08 2021",
             "June 18 2021 to July 01 2021",
             "December 31 2021 to January 14 2022"))) %>%
  select(-date) %>%
  distinct() %>%
  group_by(state) %>%
  mutate(testrate=total/population,
         ord = testrate[which(biweek==7)]) %>%
  mutate(biweek=factor(biweek)) %>%
  ggplot(aes(y=fct_reorder(name,ord,.desc=TRUE),
             x=testrate,fill=daterange)) +
  geom_bar(stat="identity",position="dodge") +
  theme_c() +
  labs(title = "Testing Rate by State",
       y="",
       x="Biweekly Testing Rate",
       fill="") +
  scale_x_continuous(n.breaks=10)

compare_testrates <- subset %>%
  mutate(testrate=total/population) %>%
  select(-date) %>%
  distinct() %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  select(biweek,state,testrate) %>%
  pivot_wider(names_from=biweek, values_from=testrate, names_prefix="biweek_") %>%
  mutate(omicron_over_alpha = biweek_27/biweek_7,
         omicron_over_delta = biweek_27/biweek_13) %>%
  select(state,contains("omicron")) %>%
  pivot_longer(c(omicron_over_alpha,omicron_over_delta)) %>%
  group_by(name) %>%
  summarize(mean_val = mean(value))

cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_alpha") %>% pull(mean_val), "times the testing rate during this time interval in the alpha wave.") 
## The testing rate during this two-week interval during the omicron wave was 2.440557 times the testing rate during this time interval in the alpha wave.
cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_delta") %>% pull(mean_val), "times the testing rate during this time interval in the delta wave.") 
## The testing rate during this two-week interval during the omicron wave was 4.931176 times the testing rate during this time interval in the delta wave.

Rank Ratio of Estimated to Observed Over Time

ranks <- subset %>%
  mutate(ratio = exp_cases_median/positive,
         tested = total/population) %>%
  # one observation per biweek per state
  select(biweek, date, ratio, name,tested) %>%
  group_by(biweek,ratio,name,tested) %>%
  summarize(date=min(date)) %>%
  # rank for each time interval
  group_by(biweek) %>%
  mutate(rank = rank(ratio),
         rank_tested=rank(-tested))


r <- ranks %>%
  mutate(rank_group = case_when(
    rank <= 10 ~  "Top 10",
    rank > 41 ~ "Bottom 10",
    TRUE ~ "Middle" )) %>%
  group_by(name, rank_group) %>%
  mutate(n=n()) %>%
  group_by(name) %>%
  mutate(total =n(),
         max_group = rank_group[which.max(n)]) %>%
  filter( max(n) >24) %>%
  filter(max_group != "Middle")  %>%
  ungroup()


top_10 <- r %>% filter(rank_group=="Top 10") %>%
  arrange(desc(n)) %>% pull(name) %>%
  unique() %>%paste(collapse=", ")

cat("States that consistently have the lowest ratios:", top_10)
## States that consistently have the lowest ratios: Rhode Island, Massachusetts, District of Columbia, Alaska, New York, Vermont
bottom_10 <- r %>% filter(rank_group=="Bottom 10") %>%
  arrange(desc(n)) %>% pull(name) %>%
  unique() %>%paste(collapse=", ")


cat("States that consistently have the highest ratios:", bottom_10)
## States that consistently have the highest ratios: Mississippi, South Dakota, Oklahoma, Nebraska, Tennessee
end_labs <- r %>%
  ungroup() %>%
  filter(date==max(date)) %>%
  mutate(date = date + days(10)) %>%
  select(name, rank, date) %>%
  ungroup()
##############################
# RANK PLOT OVER TIME
##############################
# 
# set.seed(999)
# 
# pal1 <-viridis_pal(option="viridis", end = .9, begin=.3)(3)
# pal2 <- viridis_pal(option="rocket", end = .9, begin=.3)(2)
# pal3 <- viridis_pal(option="magma", end = .9, begin=.3)(2)
# pal4 <- viridis_pal(option="mako", end = .8, begin=.3)(2)
# pal5 <- viridis_pal(option="cividis", end = .9, begin=.1)(3)
# 
# 
# rankpal <- c(pal1, pal2, pal3,pal4,pal5)
# indexes <- sample.int(length(rankpal), replace=FALSE)
# rankpal <- rankpal[indexes]


set.seed(123)

rankpal <- c("#b4ddd4", "#7b2c31", "#65d04b",
             "#da239b", "#b7d165", "#453fbc", 
             "#658114", "#af3fe8", "#ffb947",
             "#0a60a8", "#f87945", "#16d0ae")

rankpal <- c("#851657", "#be3acd", "#19a71f",
             "#824598", "#ed820a", "#BB8E37",
             "#974810", "#1f84ec", "#d02023",
             "#059dc5", "#f23387", "#16d0ae")

indexes <- sample.int(length(rankpal), replace=FALSE)
rankpal <- rankpal[indexes]



r %>%
 # filter(rank_group!="Middle") %>%
  ggplot() +
  geom_point(aes(x=date,y=rank, 
             group=name,
             color= name),
             size=.5) +
  geom_line(aes(x=date,y=rank, 
             group=name,
             color= name)) +
 # facet_wrap(~max_group) +
  ggrepel::geom_text_repel( aes(x= date-days(4),
                y = rank,
                color=name,
                label = name),
           end_labs,
           min.segment.length=2,
           nudge_y=0,
           hjust=0,
           size=3,
           direction="y",
           force_pull=9,
           box.padding=.15) +
  theme_c(legend.position="none",
          axis.text.x = element_text(angle =0,size=14)) +
  # theme(plot.subtitle =element_text(size=18),
  #       axis.title.x=element_text(size=16),
  #       axis.title.y=element_text(size=16)) +
 # scale_color_brewer(palette="Dark2")  +
  scale_color_manual(values=rankpal) +
  scale_x_date(date_breaks="2 months", date_labels = "%b %Y",
               limits = c(ymd("2021-01-01"),
                          ymd("2022-04-15"))) +
  labs(y = "Rank of Ratio",
       title = "Rank of Estimated Infections to Observed Infections Over Time",
       subtitle = "For States Ranked in the Top or Bottom 10\nfor More Than 80% of Time Intervals",
       x="Date") +
  scale_y_reverse(breaks=seq(1,51, by=9))

Summer Testing Rate

subset %>% 
  mutate(testrate = total/population) %>%
  group_by(biweek) %>% 
  mutate(m = median(testrate),
            date=min(date)) %>%
  ggplot(aes(x=date,y=testrate, group=state)) +
  geom_line(alpha=.2) +
  geom_line(aes(y=m, color='Median'), size=2) +
  labs(y = "Total Number of Tests\nOver Population Size",
       title ="Testing Rate Over Time",
       x="") +
  theme_c() +
  theme(axis.title.x = element_text(size=16),
          axis.title.y  = element_text(size=16),
        legend.position='top') +
  geom_vline(xintercept = ymd("2021-07-16"), linetype = 2, linewidth=.5) +
  geom_vline(xintercept = ymd("2021-06-18"), linetype = 2, linewidth=.5) +
  scale_x_date(date_breaks="2 months",
               date_labels = "%b %Y") +
  scale_color_manual(values=c(Median="darkred"),name='')

ggsave(here('figures/testrate-low-summer.pdf'), height=6,width=12, dpi=400)

Variant Data

m <- state_res %>%
  filter(state == "MI" & version=="v7") %>%
  pull(exp_cases_ub) %>%
  max()


variant <- tar_read(variant, store =targets_dir)

varpal <- c("#7BD8DA","#709f0f", "#e71761", "#9245c8", 
            "#97481c", "#5054b1", "#DA7BA8", "#26B392")

state_res %>%
  filter(state == "MI" & version %in% c("v7")) %>%
 # filter(state %in% sample(corrected$state,5)) %>%
 # filter(biweek >=6) %>%
  group_by(biweek) %>%
  mutate(xmin = min(date),
         xmax = max(date)) %>%
  ungroup() %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill =vlabel),
           #  fill = "#3E3D3D",
             alpha = .5,
             show.legend=FALSE) +
  scale_x_date(date_breaks = "2 months", 
               date_labels = "%b %Y") +
  theme_c(legend.position="top") +
  geom_line(aes(x=week, y=share*m,
                color =variant_category,
                group=variant_category),
            data=variant,
            linewidth=1.3) +
  # scale_fill_manual(values =pal,
  #                   labels = TeX(names(pal)),
  #                    breaks='Covidestim',
  #                   name='')  +
  scale_y_continuous(sec.axis = sec_axis( trans=~./m, name="Variant Proportion"),
                     labels = comma) +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by Version in Michigan"),
       color = "SARS-CoV-2 Variant",
       x="Date") +
  guides(color = guide_legend(override.aes = list(linewidth =3),
                              ncol=2,title.position="top")) +
 # scale_color_brewer(palette="Accent") 
  scale_color_manual(values=varpal) +
  scale_fill_manual(values=pal)

ggsave(here('figures/michigan_variant.pdf'), dpi=400)

Comparing Overlap

Weaknesses in this approach:

  • it doesn’t distinguish between a very small interval being covered (i.e. when there are very few cases) and a larger interval being covered (i.e. when there are many cases)
  • it doesn’t consider the width of the PB interval
state_overlaps <- subset %>%
  select(-date) %>%
  distinct() %>%
  mutate(lower_overlap = map2_dbl(infections.lo, exp_cases_lb, ~max(.x,.y)),
         upper_overlap = map2_dbl(infections.hi, exp_cases_ub, ~min(.x,.y)),
         overlap = case_when(
           upper_overlap-lower_overlap > 0 ~   upper_overlap-lower_overlap,
           TRUE ~ 0
         )) %>%
  select(contains('overlap'), name, biweek, 
         population, positive, total, exp_cases_lb,
         exp_cases_ub, contains('infections')) %>%
  mutate(overlap_not_norm = overlap,
         overlap = overlap/population,
         fraction_overlap = overlap_not_norm/(infections.hi-infections.lo)) 



#############
# BY STATE
#############
state_overlaps %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap)) %>%
  ggplot(aes(y= fct_reorder(name, mean_overlap),
             x =  mean_overlap)) +
  geom_bar(stat="identity") +
  theme_c() +
  labs(y="",
       x="Mean Overlap",
       title = "Mean Overlap with Covidestim by State",
       subtitle = "Overlap Normalized by Width of Covidestim Interval")

##########################
# BY TWO-WEEK INTERVAL
##########################
state_overlaps %>%
  group_by(biweek) %>%
  summarize(mean_overlap=mean(fraction_overlap)) %>%
  left_join(dates) %>%
  group_by(biweek) %>%
  summarize(date = median(date),
            mean_overlap=unique(mean_overlap)) %>%
  ggplot(aes(x=date,
             y =  mean_overlap)) +
  geom_bar(stat="identity", alpha =.7) +
  geom_vline(xintercept=end_old_model,linetype=2,
             color='darkred',
             linewidth=1.1) +
  theme_c() +
  labs(y="",
       x="Mean Overlap",
       title = "Mean Overlap with Covidestim by Time Interval",
       subtitle = "Overlap Normalized by Width of Covidestim Interval") +
  scale_x_date(date_breaks="2 months", date_labels="%b %Y")

##########################
# BY TWO-WEEK INTERVAL
##########################
# state_overlaps %>%
#   mutate(posrate=positive/total) %>%
#   filter(name %in% sample(unique(state_overlaps$name), 5)) %>%
#   ggplot(aes(x=posrate, y = fraction_overlap)) +
#   geom_point(alpha=.8) +
#   theme_c() +
#   # facet_wrap(~name, scales="free_y") +
#   labs(x = "Positivity Rate", y = "Overlap") +
#   scale_y_continuous(labels=comma)
set.seed(123)
state_sample <- sample(unique(state_overlaps$name), 16)

state_overlaps %>%
  mutate(posrate=positive/total) %>%
 # filter(name %in% state_sample) %>%
  ggplot(aes(x=posrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  facet_wrap(~name, scales="free_y") +
  labs(x = "Positivity Rate", y = "Overlap",
       title = "Comparing Fraction of Interval Covered by Positivity Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(posrate=positive/total) %>%
#  filter(name %in% state_sample) %>%
  ggplot(aes(x=posrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
 # facet_wrap(~name, scales="free_y") +
  labs(x = "Positivity Rate", y = "Overlap",
       title = "Comparing Fraction of Interval Covered by Positivity Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(testrate=total/population) %>%
  filter(name %in% state_sample) %>%
  ggplot(aes(x=testrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  facet_wrap(~name, scales="free") +
  labs( x= "Total Tests / Population Size", 
        y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
        title = "Comparing Fraction of Interval Covered by Testing Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(testrate=total/population) %>%
  filter(name %in% state_sample) %>%
  ggplot(aes(x=testrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  labs( x= "Total Tests / Population Size", 
        y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
        title = "Comparing Fraction of Interval Covered by Testing Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

Relationship Between Agreement and Cumulative Incidence (Pre-Omicron)

state_population_link <- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv"
state_pop <- read_csv(state_population_link)
statecodes <- read_csv(here("data/demographic/statecodes.csv"))
  
state_pop <- state_pop %>%
    left_join(statecodes, by = c("NAME" = "state")) %>%
    select(population = POPESTIMATE2019,
           state = code,
           name = NAME) %>%
    filter(!is.na(state))
  
state_deaths <- tar_read(state_deaths, store=targets_dir)


set.seed(123)
state_sample <- sample(unique(state_deaths$state), 5)

state_deaths %>%
#  filter(state %in% state_sample) %>%
  ggplot(aes(x=date,y=deaths/cases)) +
  geom_line() +
  facet_wrap(~state) +
  theme_c() +
  geom_vline(xintercept=end_old_model, color='darkred', linetype=2)

comp_deaths <- state_deaths %>%
  mutate(omicron=ifelse(date <= mdy("12-02-2021"),
                        "pre_omicron", "omicron")) %>%
  group_by(state, omicron) %>%
  summarize(cumulative_deaths = sum(deaths),
            cumulative_cases= sum(cases)) %>%
  pivot_wider(names_from = omicron, 
              values_from = c(cumulative_deaths, cumulative_cases)) %>%
  mutate(pre_deaths_over_cases = cumulative_deaths_pre_omicron/cumulative_cases_pre_omicron,
         omicron_deaths_over_cases = cumulative_deaths_omicron/cumulative_cases_omicron) 



comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  pivot_longer(contains("over_cases")) %>%
  group_by(state) %>%
  mutate(pre = value[which(name=="pre_deaths_over_cases")]) %>%
  mutate(name = case_when(
    grepl("omicron", name) ~ "Omicron",
    grepl("pre", name) ~ "Before Omicron"
  )) %>%
  ggplot(aes(y=fct_reorder(state,pre),  x= value, fill=name)) +
  geom_bar(stat='identity', position='dodge') +
  theme_c(legend.title = element_text(face="bold", size=14)) +
  labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
       y="",
       fill = "Time Period",
       title = "Total Deaths Over Total Cases in the Time Period Before Omicron\n Compared to During Omicron") +
  scale_x_continuous(n.breaks=7)

comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  mutate(pct_change =( omicron_deaths_over_cases -pre_deaths_over_cases ) / pre_deaths_over_cases ) %>% 
  ggplot(aes(y=fct_reorder(state,pct_change),  x= pct_change)) +
  geom_bar(stat='identity', position='dodge') +
  theme_c(legend.title = element_text(face="bold", size=11),
          axis.text.x=element_text(size=14),
          plot.subtitle = element_text(size=16,
                                       face="plain", 
                                       margin=margin(0,0,4,0))) +
  labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
       y="",
       fill = "Time Period",
       title = TeX("Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
       subtitle="Compared to During Omicron") +
  scale_x_continuous(labels=scales::percent)

comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  mutate(pct_change =( omicron_deaths_over_cases - pre_deaths_over_cases ) / pre_deaths_over_cases ) %>%
  left_join(state_overlaps, by = c('state' = 'name')) %>%
  group_by(state) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            pct_change=unique(pct_change)) %>%
  ggplot(aes(x=pct_change,y=mean_overlap)) +
  geom_point(size=2) +
  theme_c(  plot.subtitle = element_text(size=16,
                                       face="plain", 
                                       margin=margin(0,0,4,0))) +
  scale_x_continuous(labels=scales::percent) +
  labs(x="Percent Change", y = "Mean Overlap\n(By State)", 
        title = TeX("Relationship Between the Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
       subtitle="Compared to During Omicron versus the Mean Overlap")

comp_deaths %>%
  rename(name=state) %>%
  ungroup() %>%
  left_join(state_pop[,c("name", "population")]) %>%
  mutate(frac_infected = cumulative_cases_pre_omicron/population) %>%
  left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
  filter(biweek >= 25) %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            frac_infected=unique(frac_infected)) %>%
  ggplot(aes(x=frac_infected, y =mean_overlap)) +
  geom_point() +
  theme_c()

covidestim_link <- "https://covidestim.s3.us-east-2.amazonaws.com/latest/state/estimates.csv"
  
covidestim <- read_csv(covidestim_link)
  
# join data from each source to include dates before and after 2021-12-02
covidestim <- covidestim %>%
    select(date, contains("infections"), state) %>%
    filter(year(date) > 2020) %>%
    mutate(week = week(date), year = year(date))


covidestim %>%
  group_by(state) %>%
  summarize(infections=sum(infections))%>%
  rename(name=state) %>%
  left_join(state_pop[,c("name", "population")]) %>%
  mutate(frac_infected = infections/population) %>%
  left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
  filter(biweek >= 25) %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            frac_infected=unique(frac_infected)) %>%
  ggplot(aes(x=frac_infected, y =mean_overlap)) +
  geom_point() +
  theme_c()